Avant de commencer

Faire pointer R vers votre répertoire

setwd("le répertoire de mes données")
#knitr::opts_knit$set(root.dir = "~/Documents/GitHub/m1act_Spring21")
datadir <- "~/Documents/GitHub/m1act_Spring21/data"
exportdir <- "~/Documents/GitHub/m1act_Spring21/export"
graphdir <- "~/Documents/GitHub/m1act_Spring21/graph"

(Installer et) charger la librairie faraway

library(faraway)

Export et import d’objets R

aaa=rnorm(100)
bbb=rpois(100,1)
save(aaa,bbb,file=paste(exportdir,"simulations.RData",sep="/"))
rm(aaa,bbb)
try(aaa)
## Error in try(aaa) : objet 'aaa' introuvable
try(bbb)
## Error in try(bbb) : objet 'bbb' introuvable
load(paste(exportdir,"simulations.RData",sep="/"))
aaa
##   [1] -0.79792037 -1.35205511 -0.46179963 -0.68983088 -0.13872252  0.18047802
##   [7] -0.10756321 -1.18750873 -0.07161173 -0.02386442  1.08527104  0.76773347
##  [13]  0.76536340 -0.01015083  0.64901117 -1.20631499 -0.38339427 -0.17775690
##  [19] -0.26530133 -0.62058024 -0.92450352  0.71042069  0.56955588  0.37994659
##  [25]  0.06844525  2.33159470  1.08812293 -0.42940056 -0.47727741 -0.09170656
##  [31]  0.48963019 -1.26263619  1.38309821 -0.30826147  0.59543564 -0.19066936
##  [37]  0.73921247  0.06444115  1.11053499  1.67359603  1.41414694  0.29158559
##  [43]  0.40198130  1.51127338  0.35719992  0.15052745  0.15265481 -1.29622113
##  [49]  2.17128280 -1.92457874 -0.40288378 -0.88704645  0.60709760  0.38807484
##  [55]  0.07504905 -0.06725234  0.26877589 -0.16980368  0.15208394  0.08906470
##  [61] -0.09167668  0.33553181 -1.14833223 -0.52951993  1.04243243 -0.49744890
##  [67]  0.02898057 -0.34079127 -1.29743458  0.81369288  0.80853853  0.12815465
##  [73]  1.09280360 -0.05303212  0.74557702 -0.30766340 -0.19869087  0.65443750
##  [79] -0.88219742  1.17845145 -0.02520588  0.80981895 -0.10633053  0.13901020
##  [85]  0.15443569 -0.09941498  0.98425220  2.15821178  0.30634940  0.36420878
##  [91] -0.17358829 -0.77965566  0.52828412  1.01428117 -1.05588486  1.10118274
##  [97] -2.18495551  2.09507661  1.56248767 -1.98624472
bbb
##   [1] 3 0 2 0 2 1 2 1 3 2 1 1 0 0 1 1 3 1 1 2 1 2 0 0 1 1 1 2 0 0 1 2 0 1 1 1 0
##  [38] 1 0 2 1 1 0 1 2 1 1 2 3 3 1 1 1 4 2 0 0 0 0 2 1 0 0 2 1 0 1 0 2 1 0 0 0 0
##  [75] 1 0 0 0 0 0 0 0 1 1 3 1 0 0 1 2 0 1 1 2 1 1 1 1 1 1

données “vulnerability”

load(paste(datadir,"vulnerability.RData",sep="/"))
#attach(vul)
#country_name
#detach(vul)
with(vul, country_name)
##   [1] Albania                       Algeria                      
##   [3] Angola                        Argentina                    
##   [5] Armenia                       Australia                    
##   [7] Austria                       Azerbaijan                   
##   [9] Bahamas                       Bangladesh                   
##  [11] Belarus                       Belgium                      
##  [13] Belize                        Benin                        
##  [15] Bhutan                        Bolivia                      
##  [17] Bosnia-Hercegovenia           Botswana                     
##  [19] Brazil                        Bulgaria                     
##  [21] Burkina Faso                  Burundi                      
##  [23] Cambodia                      Cameroon                     
##  [25] Canada                        Central African Rep          
##  [27] Chad                          Chile                        
##  [29] China P Rep                   Colombia                     
##  [31] Congo                         Costa Rica                   
##  [33] Cote d'Ivoire                 Croatia                      
##  [35] Cuba                          Czech Rep                    
##  [37] Dominican Rep                 Ecuador                      
##  [39] Egypt                         El Salvador                  
##  [41] Eritrea                       Ethiopia                     
##  [43] Fiji                          France                       
##  [45] Gambia The                    Georgia                      
##  [47] Germany                       Ghana                        
##  [49] Greece                        Grenada                      
##  [51] Guatemala                     Guinea                       
##  [53] Guinea Bissau                 Guyana                       
##  [55] Haiti                         Honduras                     
##  [57] Hong Kong (China)             Hungary                      
##  [59] India                         Indonesia                    
##  [61] Iran Islam Rep                Ireland                      
##  [63] Israel                        Italy                        
##  [65] Jamaica                       Japan                        
##  [67] Jordan                        Kazakhstan                   
##  [69] Kenya                         Korea Rep                    
##  [71] Kyrgyzstan                    Lao P Dem Rep                
##  [73] Lebanon                       Lesotho                      
##  [75] Lithuania                     Macedonia FRY                
##  [77] Madagascar                    Malawi                       
##  [79] Malaysia                      Mali                         
##  [81] Mauritania                    Mauritius                    
##  [83] Mexico                        Moldova Rep                  
##  [85] Mongolia                      Morocco                      
##  [87] Mozambique                    Myanmar                      
##  [89] Namibia                       Nepal                        
##  [91] Netherlands                   New Zealand                  
##  [93] Nicaragua                     Niger                        
##  [95] Nigeria                       Norway                       
##  [97] Oman                          Pakistan                     
##  [99] Panama                        Papua New Guinea             
## [101] Paraguay                      Peru                         
## [103] Philippines                   Poland                       
## [105] Portugal                      Romania                      
## [107] Russia                        Rwanda                       
## [109] Samoa                         Saudi Arabia                 
## [111] Senegal                       Sierra Leone                 
## [113] Slovakia                      South Africa                 
## [115] Spain                         Sri Lanka                    
## [117] St Lucia                      St Vincent and The Grenadines
## [119] Sudan                         Suriname                     
## [121] Swaziland                     Switzerland                  
## [123] Syrian Arab Rep               Tajikistan                   
## [125] Tanzania Uni Rep              Thailand                     
## [127] Timor-Leste                   Togo                         
## [129] Tonga                         Trinidad and Tobago          
## [131] Tunisia                       Turkey                       
## [133] Uganda                        Ukraine                      
## [135] United Kingdom                United States                
## [137] Uruguay                       Vanuatu                      
## [139] Venezuela                     Viet Nam                     
## [141] Yemen                         Zaire/Congo Dem Rep          
## [143] Zambia                        Zimbabwe                     
## 144 Levels: Albania Algeria Angola Argentina Armenia Australia ... Zimbabwe
summary(vul)
##     country_name     ln_urb        ln_events         ln_fert      
##  Albania  :  1   Min.   :2.182   Min.   :0.6931   Min.   :0.3716  
##  Algeria  :  1   1st Qu.:3.421   1st Qu.:2.0794   1st Qu.:0.9507  
##  Angola   :  1   Median :3.911   Median :2.8332   Median :1.4585  
##  Argentina:  1   Mean   :3.773   Mean   :2.7752   Mean   :1.3283  
##  Armenia  :  1   3rd Qu.:4.190   3rd Qu.:3.3758   3rd Qu.:1.7047  
##  Australia:  1   Max.   :4.588   Max.   :5.9480   Max.   :2.0477  
##  (Other)  :138                                                    
##       hdi             ln_pop        ln_death_risk       death_risk       
##  Min.   :0.3350   Min.   : 0.5878   Min.   :-4.3920   Min.   :  0.01238  
##  1st Qu.:0.5321   1st Qu.: 4.2487   1st Qu.:-1.3588   1st Qu.:  0.25697  
##  Median :0.7360   Median : 5.2097   Median :-0.3045   Median :  0.73753  
##  Mean   :0.6887   Mean   : 5.1372   Mean   :-0.1689   Mean   :  5.62025  
##  3rd Qu.:0.8034   3rd Qu.: 6.2394   3rd Qu.: 0.7831   3rd Qu.:  2.18859  
##  Max.   :0.9530   Max.   :10.0206   Max.   : 5.1184   Max.   :167.07002  
## 

scatterplot

with(vul,pairs(~ln_death_risk+ln_events+ln_fert+hdi+ln_pop,  main="Simple Scatterplot Matrix"))

if(!("GGally" %in% rownames(installed.packages()))){install.packages("GGally")}
library(GGally)
## Warning: package 'ggplot2' was built under R version 4.0.2
with(vul, ggpairs(vul[,c("ln_death_risk","ln_events","ln_fert","hdi","ln_pop")]))

Modèle avec 1 covariable

fit_univ = lm(ln_death_risk~ln_events,data=vul)
print(fit_univ)
## 
## Call:
## lm(formula = ln_death_risk ~ ln_events, data = vul)
## 
## Coefficients:
## (Intercept)    ln_events  
##     -1.6048       0.5174
str(fit_univ)
## List of 12
##  $ coefficients : Named num [1:2] -1.605 0.517
##   ..- attr(*, "names")= chr [1:2] "(Intercept)" "ln_events"
##  $ residuals    : Named num [1:144] -0.297 0.692 0.254 -1.381 -1.48 ...
##   ..- attr(*, "names")= chr [1:144] "1" "2" "3" "4" ...
##  $ effects      : Named num [1:144] 2.03 -6.06 0.26 -1.41 -1.38 ...
##   ..- attr(*, "names")= chr [1:144] "(Intercept)" "ln_events" "" "" ...
##  $ rank         : int 2
##  $ fitted.values: Named num [1:144] -0.4135 0.2042 -0.0296 0.2772 -0.8875 ...
##   ..- attr(*, "names")= chr [1:144] "1" "2" "3" "4" ...
##  $ assign       : int [1:2] 0 1
##  $ qr           :List of 5
##   ..$ qr   : num [1:144, 1:2] -12 0.0833 0.0833 0.0833 0.0833 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:144] "1" "2" "3" "4" ...
##   .. .. ..$ : chr [1:2] "(Intercept)" "ln_events"
##   .. ..- attr(*, "assign")= int [1:2] 0 1
##   ..$ qraux: num [1:2] 1.08 1.06
##   ..$ pivot: int [1:2] 1 2
##   ..$ tol  : num 1e-07
##   ..$ rank : int 2
##   ..- attr(*, "class")= chr "qr"
##  $ df.residual  : int 142
##  $ xlevels      : Named list()
##  $ call         : language lm(formula = ln_death_risk ~ ln_events, data = vul)
##  $ terms        :Classes 'terms', 'formula'  language ln_death_risk ~ ln_events
##   .. ..- attr(*, "variables")= language list(ln_death_risk, ln_events)
##   .. ..- attr(*, "factors")= int [1:2, 1] 0 1
##   .. .. ..- attr(*, "dimnames")=List of 2
##   .. .. .. ..$ : chr [1:2] "ln_death_risk" "ln_events"
##   .. .. .. ..$ : chr "ln_events"
##   .. ..- attr(*, "term.labels")= chr "ln_events"
##   .. ..- attr(*, "order")= int 1
##   .. ..- attr(*, "intercept")= int 1
##   .. ..- attr(*, "response")= int 1
##   .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
##   .. ..- attr(*, "predvars")= language list(ln_death_risk, ln_events)
##   .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"
##   .. .. ..- attr(*, "names")= chr [1:2] "ln_death_risk" "ln_events"
##  $ model        :'data.frame':   144 obs. of  2 variables:
##   ..$ ln_death_risk: num [1:144] -0.71 0.896 0.225 -1.104 -2.367 ...
##   ..$ ln_events    : num [1:144] 2.3 3.5 3.04 3.64 1.39 ...
##   ..- attr(*, "terms")=Classes 'terms', 'formula'  language ln_death_risk ~ ln_events
##   .. .. ..- attr(*, "variables")= language list(ln_death_risk, ln_events)
##   .. .. ..- attr(*, "factors")= int [1:2, 1] 0 1
##   .. .. .. ..- attr(*, "dimnames")=List of 2
##   .. .. .. .. ..$ : chr [1:2] "ln_death_risk" "ln_events"
##   .. .. .. .. ..$ : chr "ln_events"
##   .. .. ..- attr(*, "term.labels")= chr "ln_events"
##   .. .. ..- attr(*, "order")= int 1
##   .. .. ..- attr(*, "intercept")= int 1
##   .. .. ..- attr(*, "response")= int 1
##   .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
##   .. .. ..- attr(*, "predvars")= language list(ln_death_risk, ln_events)
##   .. .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"
##   .. .. .. ..- attr(*, "names")= chr [1:2] "ln_death_risk" "ln_events"
##  - attr(*, "class")= chr "lm"
lm(ln_death_risk~ln_events,data=vul)$x
## named list()
lm.X <- lm(ln_death_risk~ln_events,data=vul,x=TRUE,y=TRUE)$x
lm.y <- lm(ln_death_risk~ln_events,data=vul,x=TRUE,y=TRUE)$y
all(zapsmall(with(vul,ln_death_risk)-lm.y))
## Warning in all(zapsmall(with(vul, ln_death_risk) - lm.y)): conversion
## automatique d'un argument de type 'double' en booléen (logical)
## [1] FALSE
head(lm.X)
##   (Intercept) ln_events
## 1           1  2.302585
## 2           1  3.496508
## 3           1  3.044523
## 4           1  3.637586
## 5           1  1.386294
## 6           1  4.394449
head(lm.y)
##          1          2          3          4          5          6 
## -0.7102835  0.8961845  0.2246880 -1.1036180 -2.3671240 -1.0504330
betahat=MASS::ginv(t(lm.X)%*%lm.X)%*%t(lm.X)%*%lm.y
betahat
##            [,1]
## [1,] -1.6047775
## [2,]  0.5173794
fit_univ$coefficients
## (Intercept)   ln_events 
##  -1.6047775   0.5173794
(summary(fit_univ)$coefficients)[,"Std. Error"]
## (Intercept)   ln_events 
##   0.4220358   0.1434476
#diag(sigma^2 (X'X)^{-1})
#CMR
sigma2hat <- anova(fit_univ)["Residuals","Mean Sq"]
sigma2hat*MASS::ginv(t(lm.X)%*%lm.X)
##             [,1]        [,2]
## [1,]  0.17811425 -0.05710631
## [2,] -0.05710631  0.02057723
diag(sigma2hat*MASS::ginv(t(lm.X)%*%lm.X))
## [1] 0.17811425 0.02057723
((summary(fit_univ)$coefficients)[,"Std. Error"])^2
## (Intercept)   ln_events 
##  0.17811425  0.02057723
#sum(Yhat-Yobs)^2/(n-rg(X))
head(fit_univ$fitted.values)
##           1           2           3           4           5           6 
## -0.41346753  0.20424358 -0.02960412  0.27723443 -0.88753758  0.66881972
head(fitted(fit_univ))
##           1           2           3           4           5           6 
## -0.41346753  0.20424358 -0.02960412  0.27723443 -0.88753758  0.66881972
head(predict(fit_univ))
##           1           2           3           4           5           6 
## -0.41346753  0.20424358 -0.02960412  0.27723443 -0.88753758  0.66881972
SCRes = sum((fitted(fit_univ)-lm.y)^2)
SCRes
## [1] 401.4299
anova(fit_univ)["Residuals","Sum Sq"]
## [1] 401.4299
SCRes/(length(fit_univ$residuals)-fit_univ$rank)
## [1] 2.826971
anova(fit_univ)["Residuals","Mean Sq"]
## [1] 2.826971

Comment * obtenir obtenir une valeur ajustée ? * calculer un IC autour de la moyenne de la régression ? * calculer un IP pour une nouvelle valeur ?

Création d’un data.frame qui contient la ou les valeurs pour lesquels nous voulons une prédictions, un IC ou un IP.

Ajuste le LM de ln_death_risk par rapport à toutes les autres variables. Attention au symbole . dans l’écriture, il complique l’utilisation de predict dans la suite.

lm(ln_death_risk~.,data=vul)
## 
## Call:
## lm(formula = ln_death_risk ~ ., data = vul)
## 
## Coefficients:
##                               (Intercept)  
##                                  -0.71028  
##                       country_nameAlgeria  
##                                   1.60647  
##                        country_nameAngola  
##                                   0.93497  
##                     country_nameArgentina  
##                                  -0.39333  
##                       country_nameArmenia  
##                                  -1.65684  
##                     country_nameAustralia  
##                                  -0.34015  
##                       country_nameAustria  
##                                  -0.69708  
##                    country_nameAzerbaijan  
##                                  -1.47436  
##                       country_nameBahamas  
##                                   2.03204  
##                    country_nameBangladesh  
##                                   4.82158  
##                       country_nameBelarus  
##                                  -2.50897  
##                       country_nameBelgium  
##                                  -0.98479  
##                        country_nameBelize  
##                                   3.29879  
##                         country_nameBenin  
##                                  -0.34731  
##                        country_nameBhutan  
##                                   3.87092  
##                       country_nameBolivia  
##                                   1.78249  
##           country_nameBosnia-Hercegovenia  
##                                  -2.14523  
##                      country_nameBotswana  
##                                   0.47619  
##                        country_nameBrazil  
##                                  -0.39255  
##                      country_nameBulgaria  
##                                  -0.25609  
##                  country_nameBurkina Faso  
##                                  -0.32023  
##                       country_nameBurundi  
##                                   0.92560  
##                      country_nameCambodia  
##                                   2.32582  
##                      country_nameCameroon  
##                                  -0.63194  
##                        country_nameCanada  
##                                  -0.97599  
##           country_nameCentral African Rep  
##                                  -0.80996  
##                          country_nameChad  
##                                   1.02454  
##                         country_nameChile  
##                                   0.73292  
##                   country_nameChina P Rep  
##                                   1.12384  
##                      country_nameColombia  
##                                   1.13679  
##                         country_nameCongo  
##                                  -1.14948  
##                    country_nameCosta Rica  
##                                   1.33603  
##                 country_nameCote d'Ivoire  
##                                  -1.64381  
##                       country_nameCroatia  
##                                  -3.00085  
##                          country_nameCuba  
##                                   0.22253  
##                     country_nameCzech Rep  
##                                  -0.26473  
##                 country_nameDominican Rep  
##                                   2.85372  
##                       country_nameEcuador  
##                                   1.44158  
##                         country_nameEgypt  
##                                   0.13329  
##                   country_nameEl Salvador  
##                                   2.43969  
##                       country_nameEritrea  
##                                  -2.48156  
##                      country_nameEthiopia  
##                                   1.07948  
##                          country_nameFiji  
##                                   2.83539  
##                        country_nameFrance  
##                                  -0.31220  
##                    country_nameGambia The  
##                                   1.55045  
##                       country_nameGeorgia  
##                                  -1.50327  
##                       country_nameGermany  
##                                  -1.12248  
##                         country_nameGhana  
##                                   0.42534  
##                        country_nameGreece  
##                                  -0.65955  
##                       country_nameGrenada  
##                                   3.81138  
##                     country_nameGuatemala  
##                                   3.00251  
##                        country_nameGuinea  
##                                  -1.04987  
##                 country_nameGuinea Bissau  
##                                  -1.44148  
##                        country_nameGuyana  
##                                   1.70295  
##                         country_nameHaiti  
##                                   4.57122  
##                      country_nameHonduras  
##                                   5.64852  
##             country_nameHong Kong (China)  
##                                   0.75312  
##                       country_nameHungary  
##                                  -0.32554  
##                         country_nameIndia  
##                                   1.53890  
##                     country_nameIndonesia  
##                                   0.78556  
##                country_nameIran Islam Rep  
##                                   1.29907  
##                       country_nameIreland  
##                                  -0.65920  
##                        country_nameIsrael  
##                                  -0.85106  
##                         country_nameItaly  
##                                  -1.03665  
##                       country_nameJamaica  
##                                   1.12436  
##                         country_nameJapan  
##                                  -0.02259  
##                        country_nameJordan  
##                                  -0.49753  
##                    country_nameKazakhstan  
##                                  -0.07571  
##                         country_nameKenya  
##                                   1.22299  
##                     country_nameKorea Rep  
##                                   1.43912  
##                    country_nameKyrgyzstan  
##                                  -1.67924  
##                 country_nameLao P Dem Rep  
##                                   0.90221  
##                       country_nameLebanon  
##                                  -0.28543  
##                       country_nameLesotho  
##                                   0.34927  
##                     country_nameLithuania  
##                                  -1.31896  
##                 country_nameMacedonia FRY  
##                                  -1.74645  
##                    country_nameMadagascar  
##                                   2.39586  
##                        country_nameMalawi  
##                                   2.34095  
##                      country_nameMalaysia  
##                                   0.71044  
##                          country_nameMali  
##                                  -0.76567  
##                    country_nameMauritania  
##                                   0.72398  
##                     country_nameMauritius  
##                                  -0.01242  
##                        country_nameMexico  
##                                   1.06316  
##                   country_nameMoldova Rep  
##                                   0.59083  
##                      country_nameMongolia  
##                                   1.97763  
##                       country_nameMorocco  
##                                   1.43654  
##                    country_nameMozambique  
##                                   2.11208  
##                       country_nameMyanmar  
##                                   5.82870  
##                       country_nameNamibia  
##                                   0.69992  
##                         country_nameNepal  
##                                   2.74827  
##                   country_nameNetherlands  
##                                  -1.38592  
##                   country_nameNew Zealand  
##                                  -1.24227  
##                     country_nameNicaragua  
##                                   4.42764  
##                         country_nameNiger  
##                                  -0.01850  
##                       country_nameNigeria  
##                                  -0.41541  
##                        country_nameNorway  
##                                  -3.68169  
##                          country_nameOman  
##                                   1.75716  
##                      country_namePakistan  
##                                   1.83876  
##                        country_namePanama  
##                                   0.95226  
##              country_namePapua New Guinea  
##                                   1.75926  
##                      country_nameParaguay  
##                                   0.86582  
##                          country_namePeru  
##                                   1.52230  
##                   country_namePhilippines  
##                                   3.21954  
##                        country_namePoland  
##                                  -0.95817  
##                      country_namePortugal  
##                                  -0.55522  
##                       country_nameRomania  
##                                   0.83515  
##                        country_nameRussia  
##                                  -0.51183  
##                        country_nameRwanda  
##                                   0.41185  
##                         country_nameSamoa  
##                                   2.86334  
##                  country_nameSaudi Arabia  
##                                  -0.60936  
##                       country_nameSenegal  
##                                   0.91395  
##                  country_nameSierra Leone  
##                                   0.19639  
##                      country_nameSlovakia  
##                                   0.28495  
##                  country_nameSouth Africa  
##                                   0.25514  
##                         country_nameSpain  
##                                  -1.09686  
##                     country_nameSri Lanka  
##                                   1.03989  
##                      country_nameSt Lucia  
##                                   2.66201  
## country_nameSt Vincent and The Grenadines  
##                                   2.06841  
##                         country_nameSudan  
##                                   0.66015  
##                      country_nameSuriname  
##                                   0.16024  
##                     country_nameSwaziland  
##                                  -2.16891  
##                   country_nameSwitzerland  
##                                  -0.71967  
##               country_nameSyrian Arab Rep  
##                                  -1.36322  
##                    country_nameTajikistan  
##                                   3.33479  
##              country_nameTanzania Uni Rep  
##                                   0.59007  
##                      country_nameThailand  
##                                   1.22816  
##                   country_nameTimor-Leste  
##                                  -2.20568  
##                          country_nameTogo  
##                                  -0.09997  
##                         country_nameTonga  
##                                   0.12250  
##           country_nameTrinidad and Tobago  
##                                  -0.60702  
##                       country_nameTunisia  
##                                  -0.23412  
##                        country_nameTurkey  
##                                  -0.17785  
##                        country_nameUganda  
##                                   0.76894  
##                       country_nameUkraine  
##                                  -1.95883  
##                country_nameUnited Kingdom  
##                                  -0.64498  
##                 country_nameUnited States  
##                                   0.78660  
##                       country_nameUruguay  
##                                  -0.70649  
##                       country_nameVanuatu  
##                                   3.26418  
##                     country_nameVenezuela  
##                                   4.95600  
##                      country_nameViet Nam  
##                                   2.68397  
##                         country_nameYemen  
##                                   1.48377  
##           country_nameZaire/Congo Dem Rep  
##                                  -0.60716  
##                        country_nameZambia  
##                                  -1.73928  
##                      country_nameZimbabwe  
##                                   0.39979  
##                                    ln_urb  
##                                        NA  
##                                 ln_events  
##                                        NA  
##                                   ln_fert  
##                                        NA  
##                                       hdi  
##                                        NA  
##                                    ln_pop  
##                                        NA  
##                                death_risk  
##                                        NA
newdata=data.frame(ln_events=3.4)
newdata
##   ln_events
## 1       3.4
pred=predict(fit_univ,newdata,interval="prediction")
pred
##         fit       lwr      upr
## 1 0.1543123 -3.185642 3.494266
sum(fit_univ$coefficients*c(1,3.4))
## [1] 0.1543123
ic=predict(fit_univ,interval="confidence")
print(ic[1:5,])
##           fit         lwr        upr
## 1 -0.41346753 -0.72116718 -0.1057679
## 2  0.20424358 -0.14006908  0.5485563
## 3 -0.02960412 -0.31691647  0.2577082
## 4  0.27723443 -0.09224715  0.6467160
## 5 -0.88753758 -1.36903423 -0.4060409
if(!("HH" %in% rownames(installed.packages()))){install.packages("HH")}
library('HH')

HH::ci.plot(fit_univ)

Modèle avec les 4 covariables + intercept

fonction lm

fit = lm(ln_death_risk~ln_events+ln_fert+hdi+ln_pop, data=vul)
summary(fit)
## 
## Call:
## lm(formula = ln_death_risk ~ ln_events + ln_fert + hdi + ln_pop, 
##     data = vul)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4518 -0.7673 -0.1513  0.5669  6.2271 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -5.3485     1.5175  -3.524 0.000575 ***
## ln_events     1.3708     0.1792   7.649 3.04e-12 ***
## ln_fert       2.1961     0.4614   4.760 4.81e-06 ***
## hdi           1.9922     1.2628   1.578 0.116928    
## ln_pop       -0.5672     0.1026  -5.529 1.54e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.35 on 139 degrees of freedom
## Multiple R-squared:  0.4221, Adjusted R-squared:  0.4055 
## F-statistic: 25.38 on 4 and 139 DF,  p-value: 8.522e-16
anova(fit)
## Analysis of Variance Table
## 
## Response: ln_death_risk
##            Df  Sum Sq Mean Sq F value    Pr(>F)    
## ln_events   1  36.775  36.775  20.186 1.466e-05 ***
## ln_fert     1  71.336  71.336  39.156 4.542e-09 ***
## hdi         1  21.154  21.154  11.611 0.0008576 ***
## ln_pop      1  55.703  55.703  30.575 1.540e-07 ***
## Residuals 139 253.237   1.822                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coef(fit)
## (Intercept)   ln_events     ln_fert         hdi      ln_pop 
##  -5.3484985   1.3708219   2.1960509   1.9921835  -0.5672164
head(residuals(fit))
##          1          2          3          4          5          6 
## -0.4655284  0.1040698 -0.6106572 -0.9839096  0.1853458 -1.9841708

Diagnostics sur X

Matrice de correlations

Definition de la matrice

X = vul[,c("ln_events","ln_fert","hdi","ln_pop")]
all(X==vul[,c(3:6)])
## [1] TRUE
cor_mat = cor(X)
cov_mat = cov(X)

Calcul des valeurs propres et vecteurs propres

propres = eigen(cor_mat)
propres$values
## [1] 1.9963221 1.5984915 0.2715593 0.1336271
propres$values[1] / propres$values
## [1]  1.000000  1.248879  7.351330 14.939503
propres$values[1] / propres$values[4]
## [1] 14.9395
max(propres$values[1] / propres$values)
## [1] 14.9395

library(HH,quietly = TRUE)

Variance inflation factors

vif(fit)
## ln_events   ln_fert       hdi    ln_pop 
##  2.421759  3.642415  3.767663  2.460624

Les VIF sont tous inférieurs à 5, donc pas de problème de colinéarité.

Calcul des VIFs directement à partir des formules.

R2.1 = summary(lm(ln_events~ln_fert+hdi+ln_pop, data=vul))$r.squared
(VIF1 <- 1/(1 - R2.1))
## [1] 2.421759
R2.2 = summary(lm(ln_fert~ln_events+hdi+ln_pop, data=vul))$r.squared
(VIF2 <- 1/(1 - R2.2))
## [1] 3.642415
R2.3 = summary(lm(hdi~ln_events+ln_fert+ln_pop, data=vul))$r.squared
(VIF3 <- 1/(1 - R2.3))
## [1] 3.767663
R2.4 = summary(lm(ln_pop~ln_events+ln_fert+hdi, data=vul))$r.squared
(VIF4 <- 1/(1 - R2.4))
## [1] 2.460624

Analyse des résidus

Valeurs ajustées \(\hat y\)

yhat = fitted(fit)
#fit$fitted.values

Résidus \(e = y - \hat y\)

e = residuals(fit)
#fit$residuals

Vérification du calcul des résidus à partir des valeurs ajustées. Utilité de la fonction all.equal par rapport à la comparaison directe avec ==.

with(vul, all(ln_death_risk-yhat==residuals(fit)))
## [1] FALSE
with(vul, all.equal(ln_death_risk-yhat,residuals(fit)))
## [1] TRUE

Residus stabdardisés \(e'\)

e_prime = rstandard(fit)

Residus studentisés \(e^\star\)

e_star = rstudent(fit)
lattice::histogram(e_prime-e_star)

boxplot(e_prime-e_star)

Normalité des erreurs

#plot(fit,which=2)
shapiro.test(e)
## 
##  Shapiro-Wilk normality test
## 
## data:  e
## W = 0.94169, p-value = 1.058e-05
qqnorm(e)
qqline(e)

L’asymétrie de la distribution de la racine carré de la valeur absolue des résidus standardisés \(\sqrt{|E|}\) est beaucoup plus faible que celle de la valeur absolue des résidus standardisés \(|E|\) si les \(E\) sont distribuées suivant des lois normales centrées \(N(0,.)\). Utiliser cette racine carrée permet donc d’avoir un indicateur supplémentaire de normalité.

Graphique résidus/valeurs ajustées

plot(fit,which=1)

Graphique scale-location

if(!("lmtest" %in% rownames(installed.packages()))){install.packages("lmtest")}
plot(fit,which=3)

library(lmtest, quietly = TRUE)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
bptest(ln_death_risk~ln_events+ln_fert+hdi+ln_pop+I(ln_events^2)+I(ln_fert^2)+I(hdi^2)+I(ln_pop^2),data = vul)
## 
##  studentized Breusch-Pagan test
## 
## data:  ln_death_risk ~ ln_events + ln_fert + hdi + ln_pop + I(ln_events^2) +     I(ln_fert^2) + I(hdi^2) + I(ln_pop^2)
## BP = 8.3579, df = 8, p-value = 0.3993

##Pas de normalité -> permutations ###Version 1 avec lmPerm

library(lmPerm)
fit_p <- lmp(ln_death_risk~ln_events+ln_fert+hdi+ln_pop, data=vul)
## [1] "Settings:  unique SS : numeric variables centered"
summary(fit_p)
## 
## Call:
## lmp(formula = ln_death_risk ~ ln_events + ln_fert + hdi + ln_pop, 
##     data = vul)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4518 -0.7673 -0.1513  0.5669  6.2271 
## 
## Coefficients:
##           Estimate Iter Pr(Prob)    
## ln_events   1.3708 5000   <2e-16 ***
## ln_fert     2.1961 5000   <2e-16 ***
## hdi         1.9922 1143   0.0805 .  
## ln_pop     -0.5672 5000   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.35 on 139 degrees of freedom
## Multiple R-Squared: 0.4221,  Adjusted R-squared: 0.4055 
## F-statistic: 25.38 on 4 and 139 DF,  p-value: 8.522e-16

###Version 2 avec pgirmess

library(pgirmess)
fit_p2 <- PermTest(fit)
print(fit_p2)
## 
## Monte-Carlo test
## 
## Call: 
## PermTest.lm(obj = fit)
## 
## Based on 1000 replicates
## Simulated p-value:
##           p.value
## ln_events   0.000
## ln_fert     0.000
## hdi         0.002
## ln_pop      0.000

Cas avec hétéroscédasticité :

Données simulées

n =50
X=rnorm(n,1)
epsilon = rnorm(n,0,0.5)
Yok = 2 +3* X+epsilon
lmok = lm(Yok~X)

Yhs = 2 +3* X+ abs(X)^2*epsilon
lmhs = lm(Yhs~X)

Graphiques résidus/ajustées

par(mfrow=c(1,2))
plot(lmok,which=1,main="cas ok")
plot(lmhs,which=1,main="cas heteroscedastique")

#plot(lmnl,which=1,main="cas non lineaire")

Graphiques scale/location

par(mfrow=c(1,2))
plot(lmok,which=3,main="cas ok")
plot(lmhs,which=3,main="cas heteroscedastique")

#plot(lmnl,which=3,main="cas non lineaire")

Observations influentes

Où sont les points “aberrants” ?

pairs(~ln_death_risk+ln_events+ln_fert+hdi+ln_pop,  main="Simple Scatterplot Matrix", data=vul, col=(24+36*as.numeric(abs(e_star)>2)))

Leviers, observations influentes

#Illustrer notion de levier

influences = lm.influence(fit)
hat = influences$hat

Graphique des résidus

pairs(~ln_death_risk+ln_events+ln_fert+hdi+ln_pop, data=vul, 
      main="Simple Scatterplot Matrix", 
      col=(24+50*as.numeric(hat>(2*4+2) / nrow(vul))))

Graphique DCook

any((e^2-hat)/((ncol(X)-1+1)*(1-hat)^2)>4/nrow(vul))
## [1] FALSE
which((e^2-hat)/((ncol(X)-1+1)*(1-hat)^2)>4/nrow(vul))
## integer(0)
par(mfrow=c(1,2))
plot(fit, which=4:5)

Graphique Dbetas

dfbetas=apply(abs(influences$coefficients)/influences$sigma,1,`/`,sqrt(c(144,diag(cov_mat))))
colSums(dfbetas>2/nrow(vul))
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20 
##   1   1   1   2   2   2   2   1   2   4   1   2   2   1   3   1   2   0   2   2 
##  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40 
##   2   2   2   1   2   3   1   1   1   0   2   1   1   2   1   2   2   2   3   2 
##  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60 
##   3   1   1   1   1   2   1   2   2   4   2   1   3   1   4   4   1   2   1   1 
##  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80 
##   1   2   2   0   1   0   4   3   2   2   1   1   3   1   2   2   1   1   2   3 
##  81  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100 
##   1   1   2   3   1   2   2   5   2   2   1   1   3   2   2   2   4   1   2   1 
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 
##   1   2   0   2   1   2   2   1   2   2   1   2   2   2   1   1   2   1   1   2 
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 
##   2   2   3   2   1   1   3   0   4   1   2   2   1   2   1   2   2   0   4   2 
## 141 142 143 144 
##   0   1   1   0

Relations non-linéaires

Relation non-linéaire due à une covariable

n =50
X=matrix(rnorm(n*2),ncol=2)
epsilon = rnorm(n,0,0.5)

Ynl = 2 - X[,1] + 3* X[,2]^2 + epsilon
lmnl = lm(Ynl~X[,1]+X[,2])

par(mfrow=c(1,2))
plot(lmnl, which = 1:2)

par(mfrow=c(1,2))
plot(lmnl, which = 3:4)

library(car)
## Loading required package: carData
## Registered S3 methods overwritten by 'car':
##   method                          from
##   influence.merMod                lme4
##   cooks.distance.influence.merMod lme4
##   dfbeta.influence.merMod         lme4
##   dfbetas.influence.merMod        lme4
## 
## Attaching package: 'car'
## The following objects are masked from 'package:HH':
## 
##     logit, vif
## The following objects are masked from 'package:faraway':
## 
##     logit, vif

crPlots(lmnl)

Transformation de la variable

X2_2 = X[,2]^2
lmnl_2 = lm(Ynl~X[,1]+X2_2)
crPlots(lmnl_2)

Relation non-linéaire due Y

n =50
X=matrix(rnorm(n*2),ncol=2)
epsilon = rnorm(n,0,0.5)

ln_Y = 1 - X[,1] + 0.1* X[,2] + epsilon
Y = exp(ln_Y)
lm_ln = lm(Y~X[,1]+X[,2])

par(mfrow=c(1,2))
plot(lm_ln, which = 1:2)

par(mfrow=c(1,2))
plot(lm_ln, which = 3:4)

crPlots(lm_ln)

Transformation de Y

lm_ln_trans = lm(log(Y)~X[,1]+X[,2])
plot(lm_ln_trans)

crPlots(lm_ln_trans)

Interpretation : variables discrètes

?mtcars
mtcars_simple = mtcars[c(1,2,6)]
summary(mtcars_simple)
##       mpg             cyl              wt       
##  Min.   :10.40   Min.   :4.000   Min.   :1.513  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:2.581  
##  Median :19.20   Median :6.000   Median :3.325  
##  Mean   :20.09   Mean   :6.188   Mean   :3.217  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:3.610  
##  Max.   :33.90   Max.   :8.000   Max.   :5.424

library("dplyr")
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

glimpse(mtcars_simple)
## Rows: 32
## Columns: 3
## $ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8,…
## $ cyl <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8,…
## $ wt  <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, 3.…

mtcars_simple<- dplyr::mutate(mtcars_simple, cyl = factor(cyl))
glimpse(mtcars_simple)
## Rows: 32
## Columns: 3
## $ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8,…
## $ cyl <fct> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8,…
## $ wt  <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, 3.…

fit_simple = lm(mpg~wt+factor(cyl),data=mtcars_simple)
summary(fit_simple)
## 
## Call:
## lm(formula = mpg ~ wt + factor(cyl), data = mtcars_simple)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5890 -1.2357 -0.5159  1.3845  5.7915 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   33.9908     1.8878  18.006  < 2e-16 ***
## wt            -3.2056     0.7539  -4.252 0.000213 ***
## factor(cyl)6  -4.2556     1.3861  -3.070 0.004718 ** 
## factor(cyl)8  -6.0709     1.6523  -3.674 0.000999 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.557 on 28 degrees of freedom
## Multiple R-squared:  0.8374, Adjusted R-squared:   0.82 
## F-statistic: 48.08 on 3 and 28 DF,  p-value: 3.594e-11

library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:latticeExtra':
## 
##     layer

ggplot(mtcars_simple, aes(x=wt, y=mpg, color=cyl, shape=cyl)) +
  geom_point() 

fit_croise = lm(mpg ~ wt * cyl, data = mtcars_simple)
summary(fit_croise)
## 
## Call:
## lm(formula = mpg ~ wt * cyl, data = mtcars_simple)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1513 -1.3798 -0.6389  1.4938  5.2523 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   39.571      3.194  12.389 2.06e-12 ***
## wt            -5.647      1.359  -4.154 0.000313 ***
## cyl6         -11.162      9.355  -1.193 0.243584    
## cyl8         -15.703      4.839  -3.245 0.003223 ** 
## wt:cyl6        2.867      3.117   0.920 0.366199    
## wt:cyl8        3.455      1.627   2.123 0.043440 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.449 on 26 degrees of freedom
## Multiple R-squared:  0.8616, Adjusted R-squared:  0.8349 
## F-statistic: 32.36 on 5 and 26 DF,  p-value: 2.258e-10

ggplot(mtcars_simple, aes(x=wt, y=mpg, color=cyl, shape=cyl)) +
  geom_point() + 
  geom_smooth(method=lm, se=FALSE, fullrange=TRUE)
## `geom_smooth()` using formula 'y ~ x'

ggplot(mtcars_simple, aes(x=wt, y=mpg)) +
  geom_point() + 
  geom_smooth(method=lm, se=FALSE, fullrange=TRUE, formula = y ~ x)

summary(fit_simple)
## 
## Call:
## lm(formula = mpg ~ wt + factor(cyl), data = mtcars_simple)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5890 -1.2357 -0.5159  1.3845  5.7915 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   33.9908     1.8878  18.006  < 2e-16 ***
## wt            -3.2056     0.7539  -4.252 0.000213 ***
## factor(cyl)6  -4.2556     1.3861  -3.070 0.004718 ** 
## factor(cyl)8  -6.0709     1.6523  -3.674 0.000999 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.557 on 28 degrees of freedom
## Multiple R-squared:  0.8374, Adjusted R-squared:   0.82 
## F-statistic: 48.08 on 3 and 28 DF,  p-value: 3.594e-11
summary(fit_croise)
## 
## Call:
## lm(formula = mpg ~ wt * cyl, data = mtcars_simple)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1513 -1.3798 -0.6389  1.4938  5.2523 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   39.571      3.194  12.389 2.06e-12 ***
## wt            -5.647      1.359  -4.154 0.000313 ***
## cyl6         -11.162      9.355  -1.193 0.243584    
## cyl8         -15.703      4.839  -3.245 0.003223 ** 
## wt:cyl6        2.867      3.117   0.920 0.366199    
## wt:cyl8        3.455      1.627   2.123 0.043440 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.449 on 26 degrees of freedom
## Multiple R-squared:  0.8616, Adjusted R-squared:  0.8349 
## F-statistic: 32.36 on 5 and 26 DF,  p-value: 2.258e-10
anova(fit_simple)
## Analysis of Variance Table
## 
## Response: mpg
##             Df Sum Sq Mean Sq  F value    Pr(>F)    
## wt           1 847.73  847.73 129.6650 5.079e-12 ***
## factor(cyl)  2  95.26   47.63   7.2856  0.002835 ** 
## Residuals   28 183.06    6.54                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit_croise)
## Analysis of Variance Table
## 
## Response: mpg
##           Df Sum Sq Mean Sq  F value    Pr(>F)    
## wt         1 847.73  847.73 141.3883 5.133e-12 ***
## cyl        2  95.26   47.63   7.9443   0.00203 ** 
## wt:cyl     2  27.17   13.58   2.2658   0.12386    
## Residuals 26 155.89    6.00                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit_simple,fit_croise)
## Analysis of Variance Table
## 
## Model 1: mpg ~ wt + factor(cyl)
## Model 2: mpg ~ wt * cyl
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     28 183.06                           
## 2     26 155.89  2     27.17 2.2658 0.1239